tokenize in All languages combined

IPA: /ˈtoʊ.kən.aɪz/ [General-American] Forms: tokenizes [present, singular, third-person], tokenizing [participle, present], tokenized [participle, past], tokenized [past], tokenise [alternative] [Show additional information ▼] [Hide additional information ▲]

Etymology: Etymology tree English token Proto-Indo-European *-id- Proto-Indo-European *-yéti Proto-Indo-European *-idyéti Proto-Hellenic *-íďďō Ancient Greek -ῐ́ζω (-ĭ́zō)bor. Late Latin -izōder. Middle French -iserbor. Middle English -isen English -ize English tokenize From token + -ize. Etymology templates: {{ety|en|:af|token|-ize|text=+|tree=1}} Etymology tree English token Proto-Indo-European *-id- Proto-Indo-European *-yéti Proto-Indo-European *-idyéti Proto-Hellenic *-íďďō Ancient Greek -ῐ́ζω (-ĭ́zō)bor. Late Latin -izōder. Middle French -iserbor. Middle English -isen English -ize English tokenize [Appendix:Glossary#loanword|Borrowed]] from", "terms" : [ { "children" : [ { "keyword_abbrev" : "der.", "keyword_label" : "Derived from", "terms" : [ { "children" : [ { "keyword_abbrev" : "bor.", "keyword_label" : "Borrowed from", "terms" : [ { "id" : "verbal", "children" : [ { "terms" : [ { "children" : [ { "terms" : [ { "children" : [ { "terms" : [ { "children" : [ ], "lang_name" : "Proto-Indo-European", "term" : "*-id-", "status" : "missing", "lang" : "ine-pro" }, { "children" : [ ], "lang_name" : "Proto-Indo-European", "term" : "*-yéti", "status" : "ok", "lang" : "ine-pro" } ], "keyword_label" : "From", "is_group" : true, "keyword" : "affix" } ], "lang_name" : "Proto-Indo-European", "term" : "*-idyéti", "status" : "inline", "lang" : "ine-pro" } ], "keyword_label" : "Inherited from", "keyword" : "inherited" } ], "lang_name" : "Proto-Hellenic", "term" : "*-íďďō", "status" : "inline", "lang" : "grk-pro" } ], "keyword_label" : "Inherited from", "keyword" : "inherited" } ], "status" : "ok", "lang_name" : "Ancient Greek", "term" : "-ῐ́ζω", "lang" : "grc" } ], "keyword" : "bor" } ], "lang_name" : "Late Latin", "term" : "-izō", "status" : "ok", "lang" : "la-lat" } ], "keyword" : "derived" } ], "lang_name" : "Middle French", "term" : "-iser", "status" : "inline", "lang" : "frm" } ], "keyword" : "bor" } ], "lang_name" : "Middle English", "term" : "-isen", "status" : "inline", "lang" : "enm" } ], "keyword_label" : "Inherited from", "keyword" : "inherited" } ], "lang_name" : "English", "term" : "-ize", "status" : "ok", "lang" : "en" } ], "keyword_label" : "From", "is_group" : true, "keyword" : "affix" } ], "lang_name" : "English", "term" : "tokenize", "status" : "ok", "lang" : "en" }" data-lang="en" data-title="tokenize"> From token + -ize. Head templates: {{en-verb}} tokenize (third-person singular simple present tokenizes, present participle tokenizing, simple past and past participle tokenized)

Verb [Portuguese]

Inflected forms

Alternative forms

[Show JSON for postprocessed kaikki.org data shown on this page ▼] [Hide JSON for postprocessed kaikki.org data shown on this page ▲]

{
  "derived": [
    {
      "_dis1": "0 0 0",
      "word": "detokenize"
    },
    {
      "_dis1": "0 0 0",
      "word": "mistokenize"
    },
    {
      "_dis1": "0 0 0",
      "word": "retokenize"
    },
    {
      "_dis1": "0 0 0",
      "word": "tokenizable"
    },
    {
      "_dis1": "0 0 0",
      "word": "tokenization"
    },
    {
      "_dis1": "0 0 0",
      "word": "tokenizer"
    },
    {
      "_dis1": "0 0 0",
      "word": "untokenized"
    }
  ],
  "etymology_templates": [
    {
      "args": {
        "1": "en",
        "2": ":af",
        "3": "token",
        "4": "-ize",
        "text": "+",
        "tree": "1"
      },
      "expansion": "Etymology tree\nEnglish token\nProto-Indo-European *-id-\nProto-Indo-European *-yéti\nProto-Indo-European *-idyéti\nProto-Hellenic *-íďďō\nAncient Greek -ῐ́ζω (-ĭ́zō)bor.\nLate Latin -izōder.\nMiddle French -iserbor.\nMiddle English -isen\nEnglish -ize\nEnglish tokenize\n[Appendix:Glossary#loanword|Borrowed]] from\", \"terms\" : [ { \"children\" : [ { \"keyword_abbrev\" : \"der.\", \"keyword_label\" : \"Derived from\", \"terms\" : [ { \"children\" : [ { \"keyword_abbrev\" : \"bor.\", \"keyword_label\" : \"Borrowed from\", \"terms\" : [ { \"id\" : \"verbal\", \"children\" : [ { \"terms\" : [ { \"children\" : [ { \"terms\" : [ { \"children\" : [ { \"terms\" : [ { \"children\" : [ ], \"lang_name\" : \"Proto-Indo-European\", \"term\" : \"*-id-\", \"status\" : \"missing\", \"lang\" : \"ine-pro\" }, { \"children\" : [ ], \"lang_name\" : \"Proto-Indo-European\", \"term\" : \"*-yéti\", \"status\" : \"ok\", \"lang\" : \"ine-pro\" } ], \"keyword_label\" : \"From\", \"is_group\" : true, \"keyword\" : \"affix\" } ], \"lang_name\" : \"Proto-Indo-European\", \"term\" : \"*-idyéti\", \"status\" : \"inline\", \"lang\" : \"ine-pro\" } ], \"keyword_label\" : \"Inherited from\", \"keyword\" : \"inherited\" } ], \"lang_name\" : \"Proto-Hellenic\", \"term\" : \"*-íďďō\", \"status\" : \"inline\", \"lang\" : \"grk-pro\" } ], \"keyword_label\" : \"Inherited from\", \"keyword\" : \"inherited\" } ], \"status\" : \"ok\", \"lang_name\" : \"Ancient Greek\", \"term\" : \"-ῐ́ζω\", \"lang\" : \"grc\" } ], \"keyword\" : \"bor\" } ], \"lang_name\" : \"Late Latin\", \"term\" : \"-izō\", \"status\" : \"ok\", \"lang\" : \"la-lat\" } ], \"keyword\" : \"derived\" } ], \"lang_name\" : \"Middle French\", \"term\" : \"-iser\", \"status\" : \"inline\", \"lang\" : \"frm\" } ], \"keyword\" : \"bor\" } ], \"lang_name\" : \"Middle English\", \"term\" : \"-isen\", \"status\" : \"inline\", \"lang\" : \"enm\" } ], \"keyword_label\" : \"Inherited from\", \"keyword\" : \"inherited\" } ], \"lang_name\" : \"English\", \"term\" : \"-ize\", \"status\" : \"ok\", \"lang\" : \"en\" } ], \"keyword_label\" : \"From\", \"is_group\" : true, \"keyword\" : \"affix\" } ], \"lang_name\" : \"English\", \"term\" : \"tokenize\", \"status\" : \"ok\", \"lang\" : \"en\" }\" data-lang=\"en\" data-title=\"tokenize\">\nFrom token + -ize.",
      "name": "ety"
    }
  ],
  "etymology_text": "Etymology tree\nEnglish token\nProto-Indo-European *-id-\nProto-Indo-European *-yéti\nProto-Indo-European *-idyéti\nProto-Hellenic *-íďďō\nAncient Greek -ῐ́ζω (-ĭ́zō)bor.\nLate Latin -izōder.\nMiddle French -iserbor.\nMiddle English -isen\nEnglish -ize\nEnglish tokenize\nFrom token + -ize.",
  "forms": [
    {
      "form": "tokenizes",
      "tags": [
        "present",
        "singular",
        "third-person"
      ]
    },
    {
      "form": "tokenizing",
      "tags": [
        "participle",
        "present"
      ]
    },
    {
      "form": "tokenized",
      "tags": [
        "participle",
        "past"
      ]
    },
    {
      "form": "tokenized",
      "tags": [
        "past"
      ]
    },
    {
      "form": "tokenise",
      "tags": [
        "alternative"
      ]
    }
  ],
  "head_templates": [
    {
      "args": {},
      "expansion": "tokenize (third-person singular simple present tokenizes, present participle tokenizing, simple past and past participle tokenized)",
      "name": "en-verb"
    }
  ],
  "lang": "English",
  "lang_code": "en",
  "pos": "verb",
  "related": [
    {
      "_dis1": "0 0 0",
      "word": "tokenism"
    }
  ],
  "senses": [
    {
      "categories": [
        {
          "kind": "other",
          "langcode": "en",
          "name": "Computing",
          "orig": "en:Computing",
          "parents": [],
          "source": "w"
        },
        {
          "_dis": "53 30 17",
          "kind": "other",
          "name": "English entries referencing missing etymons",
          "parents": [],
          "source": "w+disamb"
        },
        {
          "_dis": "51 30 20",
          "kind": "other",
          "name": "English entries referencing pages with etymology sections missing etymons",
          "parents": [],
          "source": "w+disamb"
        },
        {
          "_dis": "50 29 21",
          "kind": "other",
          "name": "English entries with etymology texts",
          "parents": [],
          "source": "w+disamb"
        },
        {
          "_dis": "50 29 21",
          "kind": "other",
          "name": "English entries with etymology trees",
          "parents": [],
          "source": "w+disamb"
        },
        {
          "_dis": "53 29 18",
          "kind": "other",
          "name": "English entries with etymon",
          "parents": [],
          "source": "w+disamb"
        },
        {
          "_dis": "57 35 7",
          "kind": "other",
          "name": "English entries with incorrect language header",
          "parents": [],
          "source": "w+disamb"
        },
        {
          "_dis": "36 27 37",
          "kind": "other",
          "name": "English terms suffixed with -ize",
          "parents": [],
          "source": "w+disamb"
        },
        {
          "_dis": "54 29 17",
          "kind": "other",
          "name": "Pages using etymon with no ID",
          "parents": [],
          "source": "w+disamb"
        },
        {
          "_dis": "53 30 17",
          "kind": "other",
          "name": "Pages with etymology trees",
          "parents": [],
          "source": "w+disamb"
        },
        {
          "_dis": "57 30 13",
          "kind": "other",
          "name": "Pages with etymon",
          "parents": [],
          "source": "w+disamb"
        }
      ],
      "coordinate_terms": [
        {
          "word": "codify"
        },
        {
          "word": "encode"
        }
      ],
      "glosses": [
        "To reduce to a token or set of tokens by lexical analysis."
      ],
      "id": "en-tokenize-en-verb-jT9kOSc9",
      "links": [
        [
          "computing",
          "computing#Noun"
        ],
        [
          "token",
          "token"
        ],
        [
          "lexical analysis",
          "lexical analysis"
        ]
      ],
      "raw_glosses": [
        "(transitive, computing) To reduce to a token or set of tokens by lexical analysis."
      ],
      "tags": [
        "transitive"
      ],
      "topics": [
        "computing",
        "engineering",
        "mathematics",
        "natural-sciences",
        "physical-sciences",
        "sciences"
      ]
    },
    {
      "categories": [
        {
          "kind": "other",
          "langcode": "en",
          "name": "Computing",
          "orig": "en:Computing",
          "parents": [],
          "source": "w"
        },
        {
          "_dis": "36 27 37",
          "kind": "other",
          "name": "English terms suffixed with -ize",
          "parents": [],
          "source": "w+disamb"
        }
      ],
      "glosses": [
        "To substitute sensitive data with meaningless placeholders."
      ],
      "id": "en-tokenize-en-verb--Kj3NOCS",
      "links": [
        [
          "computing",
          "computing#Noun"
        ]
      ],
      "raw_glosses": [
        "(transitive, computing) To substitute sensitive data with meaningless placeholders."
      ],
      "tags": [
        "transitive"
      ],
      "topics": [
        "computing",
        "engineering",
        "mathematics",
        "natural-sciences",
        "physical-sciences",
        "sciences"
      ]
    },
    {
      "categories": [
        {
          "_dis": "36 27 37",
          "kind": "other",
          "name": "English terms suffixed with -ize",
          "parents": [],
          "source": "w+disamb"
        }
      ],
      "glosses": [
        "To treat as a token minority."
      ],
      "id": "en-tokenize-en-verb-bFvBu9Vi",
      "links": [
        [
          "token",
          "token"
        ],
        [
          "minority",
          "minority"
        ]
      ],
      "raw_glosses": [
        "(transitive) To treat as a token minority."
      ],
      "tags": [
        "transitive"
      ]
    }
  ],
  "sounds": [
    {
      "ipa": "/ˈtoʊ.kən.aɪz/",
      "tags": [
        "General-American"
      ]
    }
  ],
  "word": "tokenize"
}

{
  "head_templates": [
    {
      "args": {
        "1": "pt",
        "2": "verb form"
      },
      "expansion": "tokenize",
      "name": "head"
    }
  ],
  "lang": "Portuguese",
  "lang_code": "pt",
  "pos": "verb",
  "senses": [
    {
      "categories": [
        {
          "_dis": "30 16 5 42 7",
          "kind": "other",
          "name": "Pages with 2 entries",
          "parents": [],
          "source": "w+disamb"
        },
        {
          "_dis": "32 15 3 46 4",
          "kind": "other",
          "name": "Pages with entries",
          "parents": [],
          "source": "w+disamb"
        },
        {
          "_dis": "76 24",
          "kind": "other",
          "name": "Portuguese entries with incorrect language header",
          "parents": [],
          "source": "w+disamb"
        }
      ],
      "form_of": [
        {
          "word": "tokenizar"
        }
      ],
      "glosses": [
        "inflection of tokenizar:",
        "first/third-person singular present subjunctive"
      ],
      "id": "en-tokenize-pt-verb-lEo1u5zf",
      "links": [
        [
          "tokenizar",
          "tokenizar#Portuguese"
        ]
      ],
      "tags": [
        "first-person",
        "form-of",
        "present",
        "singular",
        "subjunctive",
        "third-person"
      ]
    },
    {
      "form_of": [
        {
          "word": "tokenizar"
        }
      ],
      "glosses": [
        "inflection of tokenizar:",
        "third-person singular imperative"
      ],
      "id": "en-tokenize-pt-verb-OZnKiNqG",
      "links": [
        [
          "tokenizar",
          "tokenizar#Portuguese"
        ]
      ],
      "tags": [
        "form-of",
        "imperative",
        "singular",
        "third-person"
      ]
    }
  ],
  "word": "tokenize"
}

[Show JSON for raw wiktextract data ▼] [Hide JSON for raw wiktextract data ▲]

{
  "categories": [
    "English 3-syllable words",
    "English entries referencing missing etymons",
    "English entries referencing pages with etymology sections missing etymons",
    "English entries with etymology texts",
    "English entries with etymology trees",
    "English entries with etymon",
    "English entries with incorrect language header",
    "English lemmas",
    "English terms suffixed with -ize",
    "English verbs",
    "Pages using etymon with no ID",
    "Pages with 2 entries",
    "Pages with entries",
    "Pages with etymology trees",
    "Pages with etymon"
  ],
  "derived": [
    {
      "word": "detokenize"
    },
    {
      "word": "mistokenize"
    },
    {
      "word": "retokenize"
    },
    {
      "word": "tokenizable"
    },
    {
      "word": "tokenization"
    },
    {
      "word": "tokenizer"
    },
    {
      "word": "untokenized"
    }
  ],
  "etymology_templates": [
    {
      "args": {
        "1": "en",
        "2": ":af",
        "3": "token",
        "4": "-ize",
        "text": "+",
        "tree": "1"
      },
      "expansion": "Etymology tree\nEnglish token\nProto-Indo-European *-id-\nProto-Indo-European *-yéti\nProto-Indo-European *-idyéti\nProto-Hellenic *-íďďō\nAncient Greek -ῐ́ζω (-ĭ́zō)bor.\nLate Latin -izōder.\nMiddle French -iserbor.\nMiddle English -isen\nEnglish -ize\nEnglish tokenize\n[Appendix:Glossary#loanword|Borrowed]] from\", \"terms\" : [ { \"children\" : [ { \"keyword_abbrev\" : \"der.\", \"keyword_label\" : \"Derived from\", \"terms\" : [ { \"children\" : [ { \"keyword_abbrev\" : \"bor.\", \"keyword_label\" : \"Borrowed from\", \"terms\" : [ { \"id\" : \"verbal\", \"children\" : [ { \"terms\" : [ { \"children\" : [ { \"terms\" : [ { \"children\" : [ { \"terms\" : [ { \"children\" : [ ], \"lang_name\" : \"Proto-Indo-European\", \"term\" : \"*-id-\", \"status\" : \"missing\", \"lang\" : \"ine-pro\" }, { \"children\" : [ ], \"lang_name\" : \"Proto-Indo-European\", \"term\" : \"*-yéti\", \"status\" : \"ok\", \"lang\" : \"ine-pro\" } ], \"keyword_label\" : \"From\", \"is_group\" : true, \"keyword\" : \"affix\" } ], \"lang_name\" : \"Proto-Indo-European\", \"term\" : \"*-idyéti\", \"status\" : \"inline\", \"lang\" : \"ine-pro\" } ], \"keyword_label\" : \"Inherited from\", \"keyword\" : \"inherited\" } ], \"lang_name\" : \"Proto-Hellenic\", \"term\" : \"*-íďďō\", \"status\" : \"inline\", \"lang\" : \"grk-pro\" } ], \"keyword_label\" : \"Inherited from\", \"keyword\" : \"inherited\" } ], \"status\" : \"ok\", \"lang_name\" : \"Ancient Greek\", \"term\" : \"-ῐ́ζω\", \"lang\" : \"grc\" } ], \"keyword\" : \"bor\" } ], \"lang_name\" : \"Late Latin\", \"term\" : \"-izō\", \"status\" : \"ok\", \"lang\" : \"la-lat\" } ], \"keyword\" : \"derived\" } ], \"lang_name\" : \"Middle French\", \"term\" : \"-iser\", \"status\" : \"inline\", \"lang\" : \"frm\" } ], \"keyword\" : \"bor\" } ], \"lang_name\" : \"Middle English\", \"term\" : \"-isen\", \"status\" : \"inline\", \"lang\" : \"enm\" } ], \"keyword_label\" : \"Inherited from\", \"keyword\" : \"inherited\" } ], \"lang_name\" : \"English\", \"term\" : \"-ize\", \"status\" : \"ok\", \"lang\" : \"en\" } ], \"keyword_label\" : \"From\", \"is_group\" : true, \"keyword\" : \"affix\" } ], \"lang_name\" : \"English\", \"term\" : \"tokenize\", \"status\" : \"ok\", \"lang\" : \"en\" }\" data-lang=\"en\" data-title=\"tokenize\">\nFrom token + -ize.",
      "name": "ety"
    }
  ],
  "etymology_text": "Etymology tree\nEnglish token\nProto-Indo-European *-id-\nProto-Indo-European *-yéti\nProto-Indo-European *-idyéti\nProto-Hellenic *-íďďō\nAncient Greek -ῐ́ζω (-ĭ́zō)bor.\nLate Latin -izōder.\nMiddle French -iserbor.\nMiddle English -isen\nEnglish -ize\nEnglish tokenize\nFrom token + -ize.",
  "forms": [
    {
      "form": "tokenizes",
      "tags": [
        "present",
        "singular",
        "third-person"
      ]
    },
    {
      "form": "tokenizing",
      "tags": [
        "participle",
        "present"
      ]
    },
    {
      "form": "tokenized",
      "tags": [
        "participle",
        "past"
      ]
    },
    {
      "form": "tokenized",
      "tags": [
        "past"
      ]
    },
    {
      "form": "tokenise",
      "tags": [
        "alternative"
      ]
    }
  ],
  "head_templates": [
    {
      "args": {},
      "expansion": "tokenize (third-person singular simple present tokenizes, present participle tokenizing, simple past and past participle tokenized)",
      "name": "en-verb"
    }
  ],
  "lang": "English",
  "lang_code": "en",
  "pos": "verb",
  "related": [
    {
      "word": "tokenism"
    }
  ],
  "senses": [
    {
      "categories": [
        "English transitive verbs",
        "en:Computing"
      ],
      "coordinate_terms": [
        {
          "word": "codify"
        },
        {
          "word": "encode"
        }
      ],
      "glosses": [
        "To reduce to a token or set of tokens by lexical analysis."
      ],
      "links": [
        [
          "computing",
          "computing#Noun"
        ],
        [
          "token",
          "token"
        ],
        [
          "lexical analysis",
          "lexical analysis"
        ]
      ],
      "raw_glosses": [
        "(transitive, computing) To reduce to a token or set of tokens by lexical analysis."
      ],
      "tags": [
        "transitive"
      ],
      "topics": [
        "computing",
        "engineering",
        "mathematics",
        "natural-sciences",
        "physical-sciences",
        "sciences"
      ]
    },
    {
      "categories": [
        "English transitive verbs",
        "en:Computing"
      ],
      "glosses": [
        "To substitute sensitive data with meaningless placeholders."
      ],
      "links": [
        [
          "computing",
          "computing#Noun"
        ]
      ],
      "raw_glosses": [
        "(transitive, computing) To substitute sensitive data with meaningless placeholders."
      ],
      "tags": [
        "transitive"
      ],
      "topics": [
        "computing",
        "engineering",
        "mathematics",
        "natural-sciences",
        "physical-sciences",
        "sciences"
      ]
    },
    {
      "categories": [
        "English transitive verbs"
      ],
      "glosses": [
        "To treat as a token minority."
      ],
      "links": [
        [
          "token",
          "token"
        ],
        [
          "minority",
          "minority"
        ]
      ],
      "raw_glosses": [
        "(transitive) To treat as a token minority."
      ],
      "tags": [
        "transitive"
      ]
    }
  ],
  "sounds": [
    {
      "ipa": "/ˈtoʊ.kən.aɪz/",
      "tags": [
        "General-American"
      ]
    }
  ],
  "word": "tokenize"
}

{
  "categories": [
    "Pages with 2 entries",
    "Pages with entries",
    "Portuguese entries with incorrect language header",
    "Portuguese non-lemma forms",
    "Portuguese terms spelled with K",
    "Portuguese verb forms"
  ],
  "head_templates": [
    {
      "args": {
        "1": "pt",
        "2": "verb form"
      },
      "expansion": "tokenize",
      "name": "head"
    }
  ],
  "lang": "Portuguese",
  "lang_code": "pt",
  "pos": "verb",
  "senses": [
    {
      "form_of": [
        {
          "word": "tokenizar"
        }
      ],
      "glosses": [
        "inflection of tokenizar:",
        "first/third-person singular present subjunctive"
      ],
      "links": [
        [
          "tokenizar",
          "tokenizar#Portuguese"
        ]
      ],
      "tags": [
        "first-person",
        "form-of",
        "present",
        "singular",
        "subjunctive",
        "third-person"
      ]
    },
    {
      "form_of": [
        {
          "word": "tokenizar"
        }
      ],
      "glosses": [
        "inflection of tokenizar:",
        "third-person singular imperative"
      ],
      "links": [
        [
          "tokenizar",
          "tokenizar#Portuguese"
        ]
      ],
      "tags": [
        "form-of",
        "imperative",
        "singular",
        "third-person"
      ]
    }
  ],
  "word": "tokenize"
}

"tokenize" meaning in All languages combined

Verb [English]

Verb [Portuguese]

Inflected forms

Alternative forms